rm(list=ls())
## Set working directory
setwd("/Users/afanlo/Documents/Projects/Kroenig Rep")
library(dplyr)
library(MASS)
library(xtable)
library(stargazer)
library(lmtest)
library(sandwich)

## Upload Kroenig Data 
cutoff_df<-read.csv("DF_WithCutoffs.csv")
cutoff_df<-cutoff_df[,c(3:9, 29:148)]
dataframe<-read.csv("data_may2020.csv")
df<-merge(dataframe, cutoff_df, by=c("ccode", "ccode2", "crisno", "year", "crisname",
                                     "abbrev1", "abbrev2"))

##Changing back to ICB coding for Korean War
df$victory<-ifelse(df$crisno==133 & df$ccode==365,  1, df$victory)

## Changing back to ICB Coding of 1969 Sino-Soviet Border War
df$victory<-ifelse(df$crisno==231, 0, df$victory)

df$inferiority<-ifelse(df$superiority==0, 1, 0)

### Creating a function to compute clustered standard errors
vcovCluster <- function(
    model,
    cluster
)
{
  require(sandwich)
  require(lmtest)
  
  cluster = as.factor(cluster)
  
  if (!is.null(model$na.action)){
    omit.rows = model$na.action
    cluster = cluster[-omit.rows]
  }
  if (nrow(model.matrix(model)) != length(cluster)){
    stop("something's not working: cluster variable has different N than model")
  }
  M <- length(unique(cluster))
  N <- length(cluster)           
  K <- model$rank   
  
  if(M<50){
    warning("Fewer than 50 clusters, variances may be unreliable (could try block bootstrap or randomization inference instead).")
  }
  
  dfc <- (M/(M - 1)) * ((N - 1)/(N - K))
  uj  <- na.omit(apply(estfun(model), 2, function(x) tapply(x, cluster, sum)))
  rcse.cov <- dfc * sandwich(model, meat = crossprod(uj)/N)
  return(rcse.cov)
}


## Models Begin

## 29 1s

logit_1<-glm(data=df, victory~superiority+proximity + high_stakes + high_stakes_opponent + polity21 + 
               capshare + strikea +  tpop_1 + viol + cris_ave, family=binomial(link="logit"))
test1<-coeftest(logit_1, vcov=vcovCluster(model=logit_1, cluster=factor(df$crisisdyad)))


logit_1.15<-glm(data=df, victory~onep15cut+op15_par+proximity + high_stakes + high_stakes_opponent + polity21 + 
                  capshare + strikea +  tpop_1 + viol + cris_ave, family=binomial(link="logit"))
test1.15<-coeftest(logit_1.15, vcov=vcovCluster(model=logit_1.15, cluster=factor(df$crisisdyad)))

### 27 1s
logit_1.2<-glm(data=df, victory~onep2cut+onep2_par+ proximity + high_stakes + high_stakes_opponent + polity21 + 
                 capshare + strikea +  tpop_1 + viol + cris_ave, family=binomial(link="logit"))
test1.2<-coeftest(logit_1.2, vcov=vcovCluster(model=logit_1.2, cluster=factor(df$crisisdyad)))

#### 26 1s
logit_1.45<-glm(data=df, victory~onep45cut+op45_par+proximity + high_stakes + high_stakes_opponent + polity21 + 
                  capshare + strikea +  tpop_1 + viol + cris_ave, family=binomial(link="logit"))
test1.45<-coeftest(logit_1.45, vcov=vcovCluster(model=logit_1.45, cluster=factor(df$crisisdyad)))

## 25 1s
logit_1.55<-glm(data=df, victory~onep55cut+op55_par+proximity + high_stakes + high_stakes_opponent + polity21 + 
                  capshare + strikea +  tpop_1 + viol + cris_ave, family=binomial(link="logit"))
test1.55<-coeftest(logit_1.55, vcov=vcovCluster(model=logit_1.55, cluster=factor(df$crisisdyad)))


## 24 1s
logit_1.65<-glm(data=df, victory~onep65cut+op65_par+proximity + high_stakes + high_stakes_opponent + polity21 + 
                  capshare + strikea +  tpop_1 + viol + cris_ave, family=binomial(link="logit"))
test1.65<-coeftest(logit_1.65, vcov=vcovCluster(model=logit_1.65, cluster=factor(df$crisisdyad)))

## 23 1s
logit_1.85<-glm(data=df, victory~onep85cut+op85_par+proximity + high_stakes + high_stakes_opponent + polity21 + 
                  capshare + strikea +  tpop_1 + viol + cris_ave, family=binomial(link="logit"))
test1.85<-coeftest(logit_1.85, vcov=vcovCluster(model=logit_1.85, cluster=factor(df$crisisdyad)))


## 22 1s
logit_2<-glm(data=df, victory~twocut+two_par+proximity + high_stakes + high_stakes_opponent + polity21 + 
               capshare + strikea +  tpop_1 + viol + cris_ave, family=binomial(link="logit"))
test2<-coeftest(logit_2, vcov=vcovCluster(model=logit_2, cluster=factor(df$crisisdyad)))


### 21 1s
logit_2.5<-glm(data=df, victory~twop5cut+tp5_par+proximity + high_stakes + high_stakes_opponent + polity21 + 
                 capshare + strikea +  tpop_1 + viol + cris_ave, family=binomial(link="logit"))
test2.5<-coeftest(logit_2.5, vcov=vcovCluster(model=logit_2.5, cluster=factor(df$crisisdyad)))


### 20 1s
logit_3<-glm(data=df, victory~threecut+three_par+proximity + high_stakes + high_stakes_opponent + polity21 + 
               capshare + strikea +  tpop_1 + viol + cris_ave, family=binomial(link="logit"))
test3<-coeftest(logit_3, vcov=vcovCluster(model=logit_3, cluster=factor(df$crisisdyad)))


### 19 1s
logit_3.4<-glm(data=df, victory~threep4cut+threep4_par+proximity + high_stakes + high_stakes_opponent + polity21 + 
                 capshare + strikea +  tpop_1 + viol + cris_ave, family=binomial(link="logit"))
test3.4<-coeftest(logit_3.4, vcov=vcovCluster(model=logit_3.4, cluster=factor(df$crisisdyad)))

### 18 1s
logit_3.5<-glm(data=df, victory~threep5cut+threep5_par+proximity + high_stakes + high_stakes_opponent + polity21 + 
                 capshare + strikea +  tpop_1 + viol + cris_ave, family=binomial(link="logit"))
test3.5<-coeftest(logit_3.5, vcov=vcovCluster(model=logit_3.5, cluster=factor(df$crisisdyad)))


### 17 1s 
logit_4<-glm(data=df, victory~fourcut+four_par+proximity + high_stakes + high_stakes_opponent + polity21 + 
               capshare + strikea +  tpop_1 + viol + cris_ave, family=binomial(link="logit"))
test4<-coeftest(logit_4, vcov=vcovCluster(model=logit_4, cluster=factor(df$crisisdyad)))

### 16 1s
logit_7<-glm(data=df, victory~sevencut+seven_par+proximity + high_stakes + high_stakes_opponent + polity21 + 
               capshare + strikea +  tpop_1 + viol + cris_ave, family=binomial(link="logit"))
test7<-coeftest(logit_7, vcov=vcovCluster(model=logit_7, cluster=factor(df$crisisdyad)))

### 15 1s
logit_9<-glm(data=df, victory~ninecut+nine_par+proximity + high_stakes + high_stakes_opponent + polity21 + 
               capshare + strikea +  tpop_1 + viol + cris_ave, family=binomial(link="logit"))
test9<-coeftest(logit_9, vcov=vcovCluster(model=logit_9, cluster=factor(df$crisisdyad)))

### 14 1s
logit_10<-glm(data=df, victory~tencut+ten_par+proximity + high_stakes + high_stakes_opponent + polity21 + 
                capshare + strikea +  tpop_1 + viol + cris_ave, family=binomial(link="logit"))
test10<-coeftest(logit_10, vcov=vcovCluster(model=logit_10, cluster=factor(df$crisisdyad)))

## 13 1s

logit_11<-glm(data=df, victory~elevencut+elevencut_par+proximity + high_stakes + high_stakes_opponent + polity21 + 
                capshare + strikea +  tpop_1 + viol + cris_ave, family=binomial(link="logit"))
test11<-coeftest(logit_11, vcov=vcovCluster(model=logit_11, cluster=factor(df$crisisdyad)))


### 12 1s
logit_20<-glm(data=df, victory~twentycut+twenty_par+proximity + high_stakes + high_stakes_opponent + polity21 + 
                capshare + strikea +  tpop_1 + viol + cris_ave, family=binomial(link="logit"))
test20<-coeftest(logit_20, vcov=vcovCluster(model=logit_20, cluster=factor(df$crisisdyad)))

### 11 1s
logit_28<-glm(data=df, victory~twentyeightcut+twentyeight_par+proximity + high_stakes + high_stakes_opponent + polity21 + 
                capshare + strikea +  tpop_1 + viol + cris_ave, family=binomial(link="logit"))
test28<-coeftest(logit_28, vcov=vcovCluster(model=logit_28, cluster=factor(df$crisisdyad)))


### 10 1s
logit_30<-glm(data=df, victory~thirtycut+thirty_par+proximity + high_stakes + high_stakes_opponent + polity21 + 
                capshare + strikea +  tpop_1 + viol + cris_ave, family=binomial(link="logit"))
test30<-coeftest(logit_30, vcov=vcovCluster(model=logit_30, cluster=factor(df$crisisdyad)))

### 9 1s
logit_40<-glm(data=df, victory~fortycut+forty_par+proximity + high_stakes + high_stakes_opponent + polity21 + 
                capshare + strikea +  tpop_1 + viol + cris_ave, family=binomial(link="logit"))
test40<-coeftest(logit_40, vcov=vcovCluster(model=logit_40, cluster=factor(df$crisisdyad)))

### 8 1s
logit_50<-glm(data=df, victory~fiftycut+fifty_par+proximity + high_stakes + high_stakes_opponent + polity21 + 
                capshare + strikea +  tpop_1 + viol + cris_ave, family=binomial(link="logit"))
test50<-coeftest(logit_50, vcov=vcovCluster(model=logit_50, cluster=factor(df$crisisdyad)))

### 7 1s
logit_100<-glm(data=df, victory~hundredcut+hundred_par+proximity + high_stakes + high_stakes_opponent + polity21 + 
                 capshare + strikea +  tpop_1 + viol + cris_ave, family=binomial(link="logit"))
test100<-coeftest(logit_100, vcov=vcovCluster(model=logit_100, cluster=factor(df$crisisdyad)))

### 6 1s 
logit_500<-glm(data=df, victory~fivhuncut+fivhun_par+proximity + high_stakes + high_stakes_opponent + polity21 + 
                 capshare + strikea +  tpop_1 + viol + cris_ave, family=binomial(link="logit"))
test500<-coeftest(logit_500, vcov=vcovCluster(model=logit_500, cluster=factor(df$crisisdyad)))

### 5 1s 

logit_700<-glm(data=df, victory~sevenhuncut+sevenhuncut_par+proximity + high_stakes + high_stakes_opponent + polity21 + 
                 capshare + strikea +  tpop_1 + viol + cris_ave, family=binomial(link="logit"))
test700<-coeftest(logit_700, vcov=vcovCluster(model=logit_700, cluster=factor(df$crisisdyad)))

### 4 1s 
logit_1000<-glm(data=df, victory~thoucut+thou_par+proximity + high_stakes + high_stakes_opponent + polity21 + 
                  capshare + strikea +  tpop_1 + viol + cris_ave, family=binomial(link="logit"))
test1000<-coeftest(logit_1000, vcov=vcovCluster(model=logit_1000, cluster=factor(df$crisisdyad)))


# 3 1s
logit_1300<-glm(data=df, victory~thirhuncut+thirhun_par+proximity + high_stakes + high_stakes_opponent + polity21 + 
                  capshare + strikea +  tpop_1 + viol + cris_ave, family=binomial(link="logit"))
test1300<-coeftest(logit_1300, vcov=vcovCluster(model=logit_1300, cluster=factor(df$crisisdyad)))

#2 1s
logit_1500<-glm(data=df, victory~fifthuncut+fifthun_par+proximity + high_stakes + high_stakes_opponent + polity21 + 
                  capshare + strikea +  tpop_1 + viol + cris_ave, family=binomial(link="logit"))
test1500<-coeftest(logit_1500, vcov=vcovCluster(model=logit_1500, cluster=factor(df$crisisdyad)))



## Preparing Data Frames for First Differences

superior<-as.data.frame(t(as.matrix(c(1, mean(df$proximity), mean(df$high_stakes), mean(df$high_stakes_opponent), mean(df$polity21),
                                      mean(df$capshare), mean(df$strikea), mean(df$tpop_1), 
                                      mean(df$viol), mean(df$cris_ave)))))
colnames(superior)<-c("superiority", "proximity", "high_stakes", "high_stakes_opponent",
                      "polity21", "capshare", "strikea", "tpop_1",
                      "viol", "cris_ave")

inferior<-as.data.frame(t(as.matrix(c(0,  mean(df$proximity), mean(df$high_stakes), mean(df$high_stakes_opponent), mean(df$polity21),
                                      mean(df$capshare), mean(df$strikea), mean(df$tpop_1), 
                                      mean(df$viol), mean(df$cris_ave)))))
colnames(inferior)<-c("superiority", "proximity", "high_stakes", "high_stakes_opponent",
                      "polity21", "capshare", "strikea", "tpop_1",
                      "viol", "cris_ave")


######

superior1<-as.data.frame(t(as.matrix(c(1, 0, mean(df$proximity), mean(df$high_stakes), mean(df$high_stakes_opponent), mean(df$polity21),
                                       mean(df$capshare), mean(df$strikea), mean(df$tpop_1), 
                                       mean(df$viol), mean(df$cris_ave)))))
colnames(superior1)<-c("superiority", "parity", "proximity", "high_stakes", "high_stakes_opponent",
                       "polity21", "capshare", "strikea", "tpop_1",
                       "viol", "cris_ave")

inferior1<-as.data.frame(t(as.matrix(c(0, 0, mean(df$proximity), mean(df$high_stakes), mean(df$high_stakes_opponent), mean(df$polity21),
                                       mean(df$capshare), mean(df$strikea), mean(df$tpop_1), 
                                       mean(df$viol), mean(df$cris_ave)))))
colnames(inferior1)<-c("superiority", "parity", "proximity", "high_stakes", "high_stakes_opponent",
                       "polity21", "capshare", "strikea", "tpop_1",
                       "viol", "cris_ave")

first_diffs<-matrix(NA, 28, 8)
first_diffs[,1]<-c("logit_1", "logit_1.15", "logit_1.2",
                   "logit_1.45", "logit_1.55", "logit_1.65", "logit_1.85",
                   "logit_2", "logit_2.5", "logit_3", "logit_3.4", "logit_3.5",
                   "logit_4", "logit_7", 
                   "logit_9", "logit_10", "logit_11", "logit_20", "logit_28", "logit_30", 
                   "logit_40",   "logit_50",  
                   "logit_100", "logit_500", "logit_700", "logit_1000", "logit_1300",
                   "logit_1500")
first_diffs[,2]<-seq(from=29, to=2, by=-1)
first_diffs[,3]<-c(names(logit_1$coefficients[2]), names(logit_1.15$coefficients[2]), names(logit_1.2$coefficients[2]),
                   names(logit_1.45$coefficients[2]), names(logit_1.55$coefficients[2]), names(logit_1.65$coefficients[2]), names(logit_1.85$coefficients[2]),
                   names(logit_2$coefficients[2]), names(logit_2.5$coefficients[2]), names(logit_3$coefficients[2]), names(logit_3.4$coefficients[2]), 
                   names(logit_3.5$coefficients[2]),
                   names(logit_4$coefficients[2]), names(logit_7$coefficients[2]), 
                   names(logit_9$coefficients[2]), names(logit_10$coefficients[2]), names(logit_11$coefficients[2]), names(logit_20$coefficients[2]), names(logit_28$coefficients[2]), 
                   names(logit_30$coefficients[2]), 
                   names(logit_40$coefficients[2]),   names(logit_50$coefficients[2]),  
                   names(logit_100$coefficients[2]), names(logit_500$coefficients[2]), names(logit_700$coefficients[2]), names(logit_1000$coefficients[2]), names(logit_1300$coefficients[2]),
                   names(logit_1500$coefficients[2]))  

first_diffs[,4]<-c(NA, names(logit_1.15$coefficients[3]), names(logit_1.2$coefficients[3]),
                   names(logit_1.45$coefficients[3]), names(logit_1.55$coefficients[3]), names(logit_1.65$coefficients[3]), names(logit_1.85$coefficients[3]),
                   names(logit_2$coefficients[3]), names(logit_2.5$coefficients[3]), names(logit_3$coefficients[3]), names(logit_3.4$coefficients[3]), 
                   names(logit_3.5$coefficients[3]),
                   names(logit_4$coefficients[3]), names(logit_7$coefficients[3]), 
                   names(logit_9$coefficients[3]), names(logit_10$coefficients[3]), names(logit_11$coefficients[3]), names(logit_20$coefficients[3]), names(logit_28$coefficients[3]), 
                   names(logit_30$coefficients[3]), 
                   names(logit_40$coefficients[3]),   names(logit_50$coefficients[3]),  
                   names(logit_100$coefficients[3]), names(logit_500$coefficients[3]), names(logit_700$coefficients[3]), names(logit_1000$coefficients[3]), names(logit_1300$coefficients[3]),
                   names(logit_1500$coefficients[3]))  

mods<-list(logit_1, logit_1.15, logit_1.2,
           logit_1.45, logit_1.55, logit_1.65, logit_1.85,
           logit_2, logit_2.5, logit_3, logit_3.4, logit_3.5,
           logit_4, logit_7, 
           logit_9, logit_10, logit_11, logit_20, logit_28, logit_30, 
           logit_40,   logit_50,  
           logit_100, logit_500, logit_700, logit_1000, logit_1300,
           logit_1500)

for (i in 2:28){
  sup_new<-superior1
  colnames(sup_new)<-c(first_diffs[i,3], first_diffs[i, 4], colnames(superior1)[3:11]) 
  inf_new<-inferior1
  colnames(inf_new)<-c(first_diffs[i,3], first_diffs[i, 4], colnames(inferior1)[3:11]) 
  mod_new<-mods[[i]]
  first_diffs[i,5]<-predict(mod_new, sup_new, type="response")
  first_diffs[i,6]<-predict(mod_new, inf_new, type="response")
  sup_new<-NA
  inf_new<-NA
}

first_diffs[1,5]<-predict(logit_1, superior, type="response")
first_diffs[1,6]<-predict(logit_1, inferior, type="response")

first_diffs[,7]<-as.numeric(first_diffs[,5])-as.numeric(first_diffs[,6])

## Determining Which Coefs on Superiority are Significant, According to Cluster-Robust SEs
sig_sup<-matrix(NA, 28, 4)
sig_sup[,1]<-c("logit_1", "logit_1.15", "logit_1.2",
              "logit_1.45", "logit_1.55", "logit_1.65", "logit_1.85",
              "logit_2", "logit_2.5", "logit_3", "logit_3.4", "logit_3.5",
              "logit_4", "logit_7", 
              "logit_9", "logit_10", "logit_11", "logit_20", "logit_28", "logit_30", 
              "logit_40",   "logit_50",  
              "logit_100", "logit_500", "logit_700", "logit_1000", "logit_1300",
              "logit_1500")
sig_sup[,2]<-c(test1[2,4], test1.15[2,4], test1.2[2,4],
              test1.45[2,4], test1.55[2,4], test1.65[2,4], test1.85[2,4],
              test2[2,4], test2.5[2,4], test3[2,4], test3.4[2,4], test3.5[2,4],
              test4[2,4], test7[2,4], 
              test9[2,4], test10[2,4], test11[2,4], test20[2,4], test28[2,4], test30[2,4], 
              test40[2,4],   test50[2,4],  
              test100[2,4], test500[2,4], test700[2,4], test1000[2,4], test1300[2,4],
              test1500[2,4])
sig_sup<-as.data.frame(sig_sup)
sig_sup[,2]<-as.numeric(sig_sup[,2])
sig_sup[,3]<-ifelse(sig_sup[,2]<0.1, 1, 0)
sig_sup[,4]<-seq(1, 28, 1)

#### MAKING FIGURE 3 from Appendix B - Plot of First Differences  using cluster-robust standard errors 
dev.off()
par(mar=c(8.1, 4.1, 4.1, 4.1), xpd=T)
plot(first_diffs[,7], ylim=c(-1,1), xaxt="n", xlab=c("Nuclear Ratio: Superiority Threshold"), 
     ylab=c("First Difference"), main=c("Difference in Prob. of Victory: Superior vs. Inferior"))
axis(1, at=1:28, labels=c("1", "1.15", "1.2",
                          "1.45", "1.55", "1.65", "1.85",
                          "2", "2.5", "3", "3.4", "3.5",
                          "4", "7", 
                          "9", "10", "11", "20", "28", "30", 
                          "40",   "50",  
                          "100", "500", "700", "1000", "1300",
                          "1500"), las=2)
legend("bottomright", inset=c(0, -0.25),legend=c("Significant Effect, Cluster-Robust"), pch=19, 
       cex=0.5, pt.cex=1)
abline(h=0, xpd=F)

first_diffs[,8]<-seq(1,28)
points(first_diffs[1,8],first_diffs[1,7] ,  pch=19)
points(first_diffs[2,8],first_diffs[2,7] ,  pch=19)
points(first_diffs[3,8],first_diffs[3,7] ,  pch=19)
points(first_diffs[4,8],first_diffs[4,7] ,  pch=19)
points(first_diffs[5,8],first_diffs[5,7] ,  pch=19)
points(first_diffs[6,8],first_diffs[6,7] ,  pch=19)



